library(bigMap)
# load aux. stuff
source('./mcsk15.R')
# first 50 principal components
X <- as.matrix(read.csv('./mcsk15_data.csv.gz'))
# ./mcsk15/start.R
library(bigMap)
X <- as.matrix(read.csv('./mcsk15_data.csv.gz'))
threads <- 40
ppx.list <- round(nrow(X) * c(.005, .01, .05, .10, .20, .30, .40, .50), 0)
# +++ start MPI cluster
mpi.cl <- bdm.mpi.start(threads)
if (is.null(mpi.cl)) return()
# +++ run
m.list <- lapply(ppx.list, function(ppx)
{
# +++ compute betas
m <- bdm.init(X, dSet.name = 'mck15', ppx = ppx, threads = threads, mpi.cl = mpi.cl)
# +++ ptSNE
m <- bdm.ptsne(NULL, m, lRate = NULL, theta = 0.0, threads = threads, mpi.cl = mpi.cl, layers = 2)
# +++ EFR
m.efr <- bdm.efr(NULL, list(m), ppx = ppx, iters = 100, threads = threads, mpi.cl = mpi.cl)
# +++ EFR (ppx = 45)
m.efr <- bdm.efr(NULL, list(m), ppx = 45, iters = 100, threads = threads, mpi.cl = mpi.cl)
# +++ kNP
m.efr <- lapply(m.efr, function(m) bdm.knp(NULL, m, threads = threads, mpi.cl = mpi.cl))
# +++ hlC
m.efr <- lapply(m.efr, function(m) bdm.hlCorr(NULL, m, threads = threads, mpi.cl = mpi.cl))
#
m.efr
})
save(m.list, file = './mcsk15_list.RData')
# +++ stop cluster
stopCluster(mpi.cl)
Submit job:
$ qsub -pe make 20 -l h_vmem=4G Rsckt ./mcsk15/start.R
# load ouput
load('./mcsk15_list.RData')
# pt-SNE embedding
m.list1 <- lapply(m.list, function(m.ppx) m.ppx[[1]])
sapply(m.list1, function(m) m$ppx$ppx)
## [1] 224 448 2240 4481 8962 13442 17923 22404
nulL <- lapply(m.list1, function(m) bdm.cost(m))
mcsk15.legend()
# labels
L <- mcsk15.lbls(l = 1)
nulL <- lapply(m.list1, function(m) {
m$lbls <- L
bdm.ptsne.plot(m, class.pltt = MACOSKO_COLORS1, ptsne.cex = 0.3)
})
hlTable <- sapply(m.list1, function(m) summary(m$hlC)[4])
hlTable <- matrix(hlTable, nrow = 1)
colnames(hlTable) <- sapply(m.list1, function(m) m$ppx$ppx)
rownames(hlTable) <- c('<hlC>')
knitr::kable(hlTable, caption = 'hl-Correlation') %>%
kable_styling(full_width = F)
| 224 | 448 | 2240 | 4481 | 8962 | 13442 | 17923 | 22404 | |
|---|---|---|---|---|---|---|---|---|
| <hlC> | 0.1328081 | 0.0965539 | 0.1739388 | 0.1886429 | 0.1988989 | 0.314965 | 0.9076846 | 0.8988457 |
Note the HL-Correlation (~90%) for high perplexities (40%, 50% of data set size) and the similarity between the embedding and the PCA 2 first components plot;
# PCA plot
plot(X[, 1], X[, 2], pch = 15, cex = 0.3, col = MACOSKO_COLORS1[L])
# pt-SNE (ppx=17923)
m <- m.list1[[7]]
m$lbls <- L
bdm.ptsne.plot(m, class.pltt = MACOSKO_COLORS1, ptsne.cex = 0.3)